## master preamble ------------------------------------------------------
rm(list=ls(all=TRUE)) 

## inherit directories from stata -----------
args = commandArgs(trailingOnly = "TRUE")
if (length(args)) {
  dir.proj = paste0(args[1],'/')
  dir.lib  = args[2]
} 

## if loading from library -------------------
if(dir.lib!='') {
  ## load depenencies --------------------
  library(R.utils,lib.loc=dir.lib)
  library(ggplot2,lib.loc=dir.lib)
  library(tidyr,lib.loc=dir.lib)
  library(stringr,lib.loc=dir.lib)
  library(forcats,lib.loc=dir.lib)
  library(lubridate,lib.loc=dir.lib)
  library(haven,lib.loc=dir.lib)
  ## load main packages ------------------
  library(rio,lib.loc=dir.lib)
  library(dplyr,lib.loc=dir.lib)
  library(tidyverse,lib.loc=dir.lib)
  library(fixest,lib.loc=dir.lib)
## otherwise: simple load -----------------
} else {
  library(rio)
  library(dplyr)
  library(tidyverse)
  library(fixest)
}
## --------------------------------------------------------------------


## file preamble ------------------------------------------------------
## set randomization seed --------------
set.seed(661)
## directories --------
dir.data = paste0(dir.proj,'data/')
dir.est  = paste0(dir.proj,'estimates/')
dir.out  = paste0(dir.proj,'output/')
dir.temp = paste0(dir.proj,'_temp/')
## varlist -----------
Y.name   = 'cite_ny1'
cov.list = c('female','age','agesq','age_miss',
             'race_b','race_h','race_o','race_u','priorprison','local',
             'logzipincome','zipincome_miss','logprice','veh_miss',
             'speed_py1','other_py1','crash_py1')
## --------------------------------------------------------------------



#############################
### Function: residuals --------------------------------------------------------
get.resid <- function(
    data,Yname,Zname,Cov,FE,Judge,bw,deg=0,grid=1000) {
  
  ### Setup data ------------------------------
  data = dplyr::mutate(
    as.data.frame(data),
    Y = data[[Yname]],
    Z = data[[Zname]],
    jid = data[[Judge]])
  data = dplyr::select(
    data,
    Y,Z,all_of(Cov),all_of(FE),jid)
  
  ### Formulas ----------------------
  fe.form <- paste(FE,collapse='+')
  if(!is.null(Cov)) {
    cov.form <- paste(Cov,collapse='+') }
  else(cov.form <- '0')
  Y.form = as.formula(glue::glue(paste('Y~',cov.form,'|',fe.form)))
  Z.form = as.formula(glue::glue(paste('Z~',cov.form,'|',fe.form)))
  
  ### Get residuals ---------------
  rY = fixest::feols(Y.form,data)$residuals
  rZ = fixest::feols(Z.form,data)$residuals 
  
  ### Flexible fit ---------------
  fit = KernSmooth::locpoly(rZ,rY,bandwidth=bw,degree=deg,gridsize=grid)
  
  ### Compute residuals -----------
  fit.function = approxfun(fit$x,fit$y)
  uhat = rY-fit.function(rZ)
  
  ### Data frame with rY,rZ,uhat:
  out.data = data.frame(
    jid=data$jid,rY=rY,rZ=rZ,uhat=uhat,row.names=NULL)
  
  ### Data frame with kernel fit:
  out.fit = data.frame(x=fit$x,y=fit$y,row.names=NULL)
  
  ### Return data ---------------
  return(list(
    data = out.data,
    fit = out.fit))
}
### ----------------------------------------------------------------------------


#########################
### Setup data -----------------------------------------------------------------
main <- import(paste0(dir.data,'out/4-main.dta'))
main <- mutate(
  main,
  Y = main[[Y.name]],
  D = harsh,
  jid = officerid)
main <- dplyr::select(
  main,
  Y,D,Z,all_of(cov.list),totfe,jid)
### ----------------------------------------------------------------------------


##########################
## Full Sample FLL Fit Test ----------------------------------------------------

## Fit outcome to propensity --------------
resid = get.resid(main,Yname='Y',Zname='Z',Cov=cov.list,FE='totfe',Judge='jid',bw=0.01)

### Fit residuals to judge effects --------
main <- mutate(main,uhat=resid$data$uhat)
judge.fit = feols(uhat ~ 0 + i(jid),main)

### Joint test ----------------------------
judge.test = wald(judge.fit,print=F)

### Dataset that summarizes test 1 -------
out.full = dplyr::bind_rows(
  resid$data,
  resid$fit,
  data.frame(Fstat=judge.test$stat,pval=judge.test$p))


##########################
## Summarize officer fits ------------------------------------------------------
out.judge = data.frame(
  jid = as.numeric(str_replace(names(judge.fit$coefficients),'jid::','')),
  b   = judge.fit$coeftable[,1],
  s   = judge.fit$coeftable[,2],
  p   = judge.fit$coeftable[,4],row.names=NULL)
## Rank on p-values ------------
out.judge = mutate(arrange(out.judge,p),rank=seq(1:nrow(out.judge)))
## Add auxilary info -----------
out.judge = inner_join(
  out.judge,
  summarize(group_by(resid$data,jid),N=n(),rZ=mean(rZ,na.rm=T)))


##########################
## Repeat test with dropped officers -------------------------------------------
## Benchmark: Drop top 150 officers in terms of p-values -----------------

## New data on subset of officers ---------
main = inner_join(
  main,
  dplyr::select(filter(out.judge,rank>235),jid))

## Fit outcome to propensity --------------
resid = get.resid(main,Yname='Y',Zname='Z',Cov=cov.list,FE='totfe',Judge='jid',bw=0.01)

### Fit residuals to judge effects --------
main <- mutate(main,uhat=resid$data$uhat)
judge.fit = feols(uhat ~ 0 + i(jid),main)

### Joint test ----------------------------
judge.test = wald(judge.fit,print=F)

### Dataset that summarizes subset test -------
out.sub = dplyr::bind_rows(
  resid$data,
  resid$fit,
  data.frame(Fstat=judge.test$stat,pval=judge.test$p))


##########################
## Export info for stata -----------------------
export(out.full,paste0(dir.est,'fll_full.dta'))
export(out.sub,paste0(dir.est,'fll_sub.dta'))
export(out.judge,paste0(dir.est,'fll_judges.dta'))


